Packages

library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(haven)
library(mice)
## 
## Attaching package: 'mice'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggpubr)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(flextable)
## Warning: package 'flextable' was built under R version 4.3.3
## 
## Attaching package: 'flextable'
## 
## The following objects are masked from 'package:ggpubr':
## 
##     border, font, rotate
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.3.3
library(sjmisc)
## Warning: package 'sjmisc' was built under R version 4.3.3
## Learn more about sjmisc with 'browseVignettes("sjmisc")'.
## 
## Attaching package: 'sjmisc'
## 
## The following object is masked from 'package:purrr':
## 
##     is_empty
## 
## The following object is masked from 'package:tidyr':
## 
##     replace_na
## 
## The following object is masked from 'package:tibble':
## 
##     add_case
library(sjlabelled)
## Warning: package 'sjlabelled' was built under R version 4.3.3
## 
## Attaching package: 'sjlabelled'
## 
## The following objects are masked from 'package:haven':
## 
##     as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
## 
## The following object is masked from 'package:forcats':
## 
##     as_factor
## 
## The following object is masked from 'package:dplyr':
## 
##     as_label
## 
## The following object is masked from 'package:ggplot2':
## 
##     as_label

Import databases

municipios <- read_xlsx("municipios.xlsx")

propiedad <- read_xlsx("propiedad.xlsx")

Descriptive of the Municipalities Database

To grasp a better understanding of the database a descriptive analysis of some of the variables is performed.

Housing Price Index

The Housing Price Index measures the change in housing prices, starting from a base of 100 set in 2015. The average of the General HPI, which reflects the price of all homes on the market, is 125, indicating a 25% price increase since 2015.

The boxplots show that the General HPI has several outliers with a much higher value than the average. We also see that this is repeated in the cases of the HPI for Second-hand housing and New housing. The General HPI has a value much closer to the Second-hand HPI, which indicates that there are more sales of second-hand homes than new construction homes in the market.

The Autonomous Communities where housing prices have increased the most are Madrid, Balearic Islands, Barcelona, Ceuta, and Melilla.

# IPV
mean(municipios$ipv2021General)
## [1] 126.4497
mean(municipios$ipv2011General)
## [1] 126.4662
ggplot(municipios, aes(x = 1, y = ipv2021General, fill = "ipv2021General")) +
  geom_boxplot(position = position_dodge(width = 0.8), fill = "Orange") +
  geom_boxplot(aes(x = 2, y = ipv2021Nueva, fill = "ipv2021Nueva"), position = position_dodge(width = 0.8), fill = "Blue") +
  geom_boxplot(aes(x = 3, y = ipv2021Segunda, fill = "ipv2021Segunda"), position = position_dodge(width = 0.8), fill = "Green") +
  scale_x_continuous(breaks = c(1, 2, 3), labels = c("ipv2021General", "ipv2021Nueva", "ipv2021Segunda")) +
  labs(y = "Índice de Precio de Viviendas",
       title = "Distribución del Índice de Precio de Viviendas por Tipo en 2021") +
  theme_minimal()

# top index value 2021
municipios |> 
  distinct(CCAA, .keep_all = T) |> 
  arrange(desc(ipv2021General)) |> 
  slice(1:5)
## # A tibble: 5 × 55
##   CPRO  CMUN  cusec MUN_LITERAL   PROV_LITERAL CCAA  ipv2021General ipv2021Nueva
##   <chr> <chr> <chr> <chr>         <chr>        <chr>          <dbl>        <dbl>
## 1 28    991   28991 <=2000        Madrid       Comu…           147.         152.
## 2 07    993   07993 5001 <= 10000 Balears, Il… Isla…           144.         153.
## 3 51    001   51001 Ceuta         Ceuta        Ceuta           142.         142.
## 4 08    994   08994 10001 <= 200… Barcelona    Cata…           142.         153.
## 5 52    001   52001 Melilla       Melilla      Meli…           140.         140.
## # ℹ 47 more variables: ipv2021Segunda <dbl>, ipv2011General <dbl>,
## #   ipv2011Nueva <dbl>, ipv2011Segunda <dbl>, ipvGeneralDif <dbl>,
## #   RentaMedia2021 <dbl>, PorcentajeJoven2011 <dbl>, PorcentajeJoven2021 <dbl>,
## #   PorcentajeJovenDif <dbl>, HABITAT <chr>, PorcentajeVacias2011 <dbl>,
## #   PorcentajeVacias2021 <dbl>, VaciasDif <dbl>,
## #   PorcentajeSecundarias2011 <dbl>, PorcentajeSecundarias2021 <dbl>,
## #   SecundariasDif <dbl>, PorcentajeTurísticas2021 <dbl>, Compra11Mun <dbl>, …

RentaMedia2021

Average disposable/net personal income. The average value for municipalities is €21,236 per year. However, there is a lot of variation between municipalities. The municipality with the highest income is Pozuelo de Alarcón with a value of €57,977, while the lowest income is Tudela in Navarra with €13,443.

mean(municipios$RentaMedia2021)
## [1] 21236.49
municipios |> 
  arrange(desc(RentaMedia2021)) |> 
  slice(1:3) |> 
  select(PROV_LITERAL, MUN_LITERAL, RentaMedia2021)
## # A tibble: 3 × 3
##   PROV_LITERAL MUN_LITERAL        RentaMedia2021
##   <chr>        <chr>                       <dbl>
## 1 Madrid       Pozuelo de Alarcón          57977
## 2 Madrid       Boadilla del Monte          46480
## 3 Madrid       Alcobendas                  43720
municipios |> 
  arrange(RentaMedia2021) |> 
  slice(1:3) |> 
  select(PROV_LITERAL, MUN_LITERAL, RentaMedia2021)
## # A tibble: 3 × 3
##   PROV_LITERAL MUN_LITERAL   RentaMedia2021
##   <chr>        <chr>                  <dbl>
## 1 Navarra      Tudela                 13443
## 2 Navarra      2001 <= 5000           13580
## 3 Navarra      5001 <= 10000          13771

PorcentajeJoven

The average percentage of young population in municipalities in 2021 is 36%, while in 2011 it was 39%. The number of young people has decreased.

The municipaliy with the highest young population is Vícar in Almería, probably due to the high presence of young agriculture workers.

mean(municipios$PorcentajeJoven2021)
## [1] 36.77195
mean(municipios$PorcentajeJoven2011)
## [1] 39.9888
municipios |> 
  arrange(desc(PorcentajeJoven2021)) |> 
  slice(1:3) |> 
  select(PROV_LITERAL, MUN_LITERAL, PorcentajeJoven2021)
## # A tibble: 3 × 3
##   PROV_LITERAL MUN_LITERAL PorcentajeJoven2021
##   <chr>        <chr>                     <dbl>
## 1 Almería      Vícar                      51.1
## 2 Melilla      Melilla                    50.5
## 3 Almería      Níjar                      49.6
municipios |> 
  arrange(PorcentajeJoven2021) |> 
  slice(1:3) |> 
  select(PROV_LITERAL, MUN_LITERAL, PorcentajeJoven2021)
## # A tibble: 3 × 3
##   PROV_LITERAL MUN_LITERAL PorcentajeJoven2021
##   <chr>        <chr>                     <dbl>
## 1 Ourense      <=2000                     19.9
## 2 León         <=2000                     20.8
## 3 Lugo         <=2000                     21.0

#- # Descriptive Tenure Regimes

In 2011

Per municipality

# Ownership
top_propiedad_2011 <- propiedad %>%
  arrange(desc(Propiedad11Mun)) %>%
  distinct(Propiedad11Mun, .keep_all = TRUE) |> 
  select(Propiedad11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

# Purchase
top_compra_2011 <- propiedad %>%
  arrange(desc(Compra11Mun)) %>%
  distinct(Compra11Mun, .keep_all = TRUE) |> 
  select(Compra11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

# Inheritance
top_herencia_2011 <- propiedad %>%
  arrange(desc(Herencia11Mun)) %>%
  distinct(cusec, .keep_all = TRUE) |> 
  select(Herencia11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

# Rent
top_alquiler_2011 <- propiedad %>%
  arrange(desc(Alquiler11Mun)) %>%
  distinct(cusec, .keep_all = TRUE) |> 
  select(Alquiler11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

# Other
top_otro_2011 <- propiedad %>%
  arrange(desc(Otro11Mun)) %>%
  distinct(cusec, .keep_all = TRUE) |> 
  select(Otro11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

top_propiedad_2011
## # A tibble: 531 × 4
##    Propiedad11Mun MUN_LITERAL        PROV_LITERAL       CCAA                
##             <dbl> <chr>              <chr>              <chr>               
##  1           93.9 Almansa            Albacete           Castilla-La Mancha  
##  2           92.5 2001 <= 5000       Zamora             Castilla y León     
##  3           91.8 Petrer             Alicante/Alacant   Comunidad Valenciana
##  4           91.7 Martos             Jaén               Andalucía           
##  5           91.1 Ibi                Alicante/Alacant   Comunidad Valenciana
##  6           91.0 Vall d'Uixó, la    Castellón/Castelló Comunidad Valenciana
##  7           90.8 Arroyomolinos      Madrid             Comunidad de Madrid 
##  8           90.6 Paiporta           Valencia/València  Comunidad Valenciana
##  9           90.3 Riba-roja de Túria Valencia/València  Comunidad Valenciana
## 10           90.2 Puerto Real        Cádiz              Andalucía           
## # ℹ 521 more rows
top_compra_2011
## # A tibble: 531 × 4
##    Compra11Mun MUN_LITERAL          PROV_LITERAL      CCAA                
##          <dbl> <chr>                <chr>             <chr>               
##  1        92.5 2001 <= 5000         Zamora            Castilla y León     
##  2        90.8 Arroyomolinos        Madrid            Comunidad de Madrid 
##  3        90.8 Almansa              Albacete          Castilla-La Mancha  
##  4        90.1 Petrer               Alicante/Alacant  Comunidad Valenciana
##  5        90   Alcalá de Guadaíra   Sevilla           Andalucía           
##  6        89.7 Paiporta             Valencia/València Comunidad Valenciana
##  7        89.5 Martos               Jaén              Andalucía           
##  8        89.3 Morón de la Frontera Sevilla           Andalucía           
##  9        89.3 Dos Hermanas         Sevilla           Andalucía           
## 10        88.6 Ibi                  Alicante/Alacant  Comunidad Valenciana
## # ℹ 521 more rows
top_herencia_2011
## # A tibble: 584 × 4
##    Herencia11Mun MUN_LITERAL        PROV_LITERAL           CCAA              
##            <dbl> <chr>              <chr>                  <chr>             
##  1          40   2001 <= 5000       Palmas, Las            Canarias          
##  2          33.3 <=2000             Palmas, Las            Canarias          
##  3          25   <=2000             Santa Cruz de Tenerife Canarias          
##  4          17.9 <=2000             Pontevedra             Galicia           
##  5          17.8 <=2000             Ourense                Galicia           
##  6          17.1 Icod de los Vinos  Santa Cruz de Tenerife Canarias          
##  7          13.8 Alhaurín el Grande Málaga                 Andalucía         
##  8          12.9 5001 <= 10000      Albacete               Castilla-La Mancha
##  9          12.4 Redondela          Pontevedra             Galicia           
## 10          12.3 <=2000             Lugo                   Galicia           
## # ℹ 574 more rows
top_alquiler_2011
## # A tibble: 584 × 4
##    Alquiler11Mun MUN_LITERAL            PROV_LITERAL   CCAA               
##            <dbl> <chr>                  <chr>          <chr>              
##  1          53.1 Sitges                 Barcelona      Cataluña           
##  2          52.3 Barcelona              Barcelona      Cataluña           
##  3          49.7 Sant Cugat del Vallès  Barcelona      Cataluña           
##  4          48.8 Girona                 Girona         Cataluña           
##  5          48.4 Maó                    Balears, Illes Islas Baleares     
##  6          48.2 Mogán                  Palmas, Las    Canarias           
##  7          47.4 Santiago de Compostela Coruña, A      Galicia            
##  8          46.7 Lloret de Mar          Girona         Cataluña           
##  9          46.4 Eivissa                Balears, Illes Islas Baleares     
## 10          44.2 Madrid                 Madrid         Comunidad de Madrid
## # ℹ 574 more rows
top_otro_2011
## # A tibble: 584 × 4
##    Otro11Mun MUN_LITERAL PROV_LITERAL           CCAA       
##        <dbl> <chr>       <chr>                  <chr>      
##  1      33.3 <=2000      Palmas, Las            Canarias   
##  2      31.8 <=2000      Santa Cruz de Tenerife Canarias   
##  3      29.6 <=2000      Málaga                 Andalucía  
##  4      28.6 <=2000      Pontevedra             Galicia    
##  5      27.8 <=2000      Cáceres                Extremadura
##  6      27.6 <=2000      Huelva                 Andalucía  
##  7      25.4 <=2000      Lugo                   Galicia    
##  8      24.7 <=2000      Jaén                   Andalucía  
##  9      23.8 <=2000      Badajoz                Extremadura
## 10      23.1 <=2000      Coruña, A              Galicia    
## # ℹ 574 more rows

Per province

# Ownership
top_propiedad_2011 <- propiedad |> 
    distinct(CPRO, .keep_all = TRUE) |> 
    arrange(desc(Propiedad11Prov)) |> 
    select(Herencia11Prov, PROV_LITERAL, CCAA)


# Inheritance
top_herencia_2011 <- propiedad |> 
    distinct(CPRO, .keep_all = TRUE) |> 
    arrange(desc(Herencia11Prov)) |> 
    select(Herencia11Prov, PROV_LITERAL, CCAA)

# Rent
top_alquiler_2011 <- propiedad %>%
  arrange(desc(Alquiler11Prov)) %>%
  distinct(CPRO, .keep_all = TRUE) |> 
  select(Alquiler11Prov, PROV_LITERAL, CCAA)

# Other
top_otro_2011 <- propiedad %>%
  arrange(desc(Otro11Prov)) %>%
  distinct(CPRO, .keep_all = TRUE) |> 
  select(Otro11Prov, PROV_LITERAL, CCAA)

top_compra_2011
## # A tibble: 531 × 4
##    Compra11Mun MUN_LITERAL          PROV_LITERAL      CCAA                
##          <dbl> <chr>                <chr>             <chr>               
##  1        92.5 2001 <= 5000         Zamora            Castilla y León     
##  2        90.8 Arroyomolinos        Madrid            Comunidad de Madrid 
##  3        90.8 Almansa              Albacete          Castilla-La Mancha  
##  4        90.1 Petrer               Alicante/Alacant  Comunidad Valenciana
##  5        90   Alcalá de Guadaíra   Sevilla           Andalucía           
##  6        89.7 Paiporta             Valencia/València Comunidad Valenciana
##  7        89.5 Martos               Jaén              Andalucía           
##  8        89.3 Morón de la Frontera Sevilla           Andalucía           
##  9        89.3 Dos Hermanas         Sevilla           Andalucía           
## 10        88.6 Ibi                  Alicante/Alacant  Comunidad Valenciana
## # ℹ 521 more rows
top_herencia_2011
## # A tibble: 52 × 3
##    Herencia11Prov PROV_LITERAL           CCAA              
##             <dbl> <chr>                  <chr>             
##  1           8.08 Ourense                Galicia           
##  2           5.76 Pontevedra             Galicia           
##  3           5.73 Jaén                   Andalucía         
##  4           5.71 Cuenca                 Castilla-La Mancha
##  5           5.55 Lugo                   Galicia           
##  6           5.54 Teruel                 Aragón            
##  7           5.38 Cáceres                Extremadura       
##  8           5.28 Santa Cruz de Tenerife Canarias          
##  9           4.55 Albacete               Castilla-La Mancha
## 10           4.55 Badajoz                Extremadura       
## # ℹ 42 more rows
top_alquiler_2011
## # A tibble: 52 × 3
##    Alquiler11Prov PROV_LITERAL   CCAA               
##             <dbl> <chr>          <chr>              
##  1           40.4 Melilla        Melilla            
##  2           32.6 Girona         Cataluña           
##  3           31.4 Balears, Illes Islas Baleares     
##  4           31.0 Barcelona      Cataluña           
##  5           30.7 Madrid         Comunidad de Madrid
##  6           30.3 Teruel         Aragón             
##  7           27.5 Lleida         Cataluña           
##  8           26.4 Palmas, Las    Canarias           
##  9           26.4 Ceuta          Ceuta              
## 10           25.6 Huesca         Aragón             
## # ℹ 42 more rows
top_otro_2011
## # A tibble: 52 × 3
##    Otro11Prov PROV_LITERAL CCAA           
##         <dbl> <chr>        <chr>          
##  1       19.5 Cáceres      Extremadura    
##  2       15.1 Jaén         Andalucía      
##  3       15.1 Ourense      Galicia        
##  4       14.9 Pontevedra   Galicia        
##  5       14.9 Lugo         Galicia        
##  6       14.8 Zamora       Castilla y León
##  7       14.6 Badajoz      Extremadura    
##  8       14.6 Ceuta        Ceuta          
##  9       14.2 Teruel       Aragón         
## 10       14.2 Ávila        Castilla y León
## # ℹ 42 more rows

Per CCAA

# Ownership
top_propiedad_2011 <- propiedad |> 
    distinct(CPRO, .keep_all = TRUE) |> 
    arrange(desc(Propiedad11CCAA)) |> 
    select(Herencia11Prov, PROV_LITERAL, CCAA)

# Inheritance
top_herencia_2011 <- propiedad |> 
    distinct(CCAA, .keep_all = TRUE) |> 
    arrange(desc(Herencia11CCAA)) |> 
    select(Herencia11CCAA, CCAA)

# Rent
top_alquiler_2011 <- propiedad %>%
  arrange(desc(Alquiler11CCAA)) %>%
  distinct(CCAA, .keep_all = TRUE) |> 
  select(Alquiler11CCAA, CCAA)

# Other
top_otro_2011 <- propiedad %>%
  arrange(desc(Otro11CCAA)) %>%
  distinct(CCAA, .keep_all = TRUE) |> 
  select(Otro11CCAA, CCAA)

top_propiedad_2011
## # A tibble: 52 × 3
##    Herencia11Prov PROV_LITERAL       CCAA                
##             <dbl> <chr>              <chr>               
##  1           2.88 Alicante/Alacant   Comunidad Valenciana
##  2           4.33 Castellón/Castelló Comunidad Valenciana
##  3           3.58 Valencia/València  Comunidad Valenciana
##  4           1.93 Cantabria          Cantabria           
##  5           3.12 Murcia             Comunidad de Murcia 
##  6           1.26 Araba/Álava        País Vasco          
##  7           2.54 Gipuzkoa           País Vasco          
##  8           2.6  Bizkaia            País Vasco          
##  9           2.85 Almería            Andalucía           
## 10           2.91 Cádiz              Andalucía           
## # ℹ 42 more rows
top_compra_2011
## # A tibble: 531 × 4
##    Compra11Mun MUN_LITERAL          PROV_LITERAL      CCAA                
##          <dbl> <chr>                <chr>             <chr>               
##  1        92.5 2001 <= 5000         Zamora            Castilla y León     
##  2        90.8 Arroyomolinos        Madrid            Comunidad de Madrid 
##  3        90.8 Almansa              Albacete          Castilla-La Mancha  
##  4        90.1 Petrer               Alicante/Alacant  Comunidad Valenciana
##  5        90   Alcalá de Guadaíra   Sevilla           Andalucía           
##  6        89.7 Paiporta             Valencia/València Comunidad Valenciana
##  7        89.5 Martos               Jaén              Andalucía           
##  8        89.3 Morón de la Frontera Sevilla           Andalucía           
##  9        89.3 Dos Hermanas         Sevilla           Andalucía           
## 10        88.6 Ibi                  Alicante/Alacant  Comunidad Valenciana
## # ℹ 521 more rows
top_herencia_2011
## # A tibble: 19 × 2
##    Herencia11CCAA CCAA                      
##             <dbl> <chr>                     
##  1           5.35 Galicia                   
##  2           4.88 Extremadura               
##  3           4.07 Canarias                  
##  4           3.64 Islas Baleares            
##  5           3.57 Andalucía                 
##  6           3.46 Comunidad Valenciana      
##  7           3.12 Comunidad de Murcia       
##  8           3.07 Asturias                  
##  9           3.02 Castilla-La Mancha        
## 10           3    Aragón                    
## 11           2.74 Comunidad Foral de Navarra
## 12           2.38 Castilla y León           
## 13           2.32 País Vasco                
## 14           1.93 Cantabria                 
## 15           1.9  Cataluña                  
## 16           1.62 La Rioja                  
## 17           1.52 Ceuta                     
## 18           1.39 Melilla                   
## 19           1.26 Comunidad de Madrid
top_alquiler_2011
## # A tibble: 19 × 2
##    Alquiler11CCAA CCAA                      
##             <dbl> <chr>                     
##  1           40.4 Melilla                   
##  2           31.4 Islas Baleares            
##  3           30.7 Comunidad de Madrid       
##  4           29.8 Cataluña                  
##  5           26.4 Ceuta                     
##  6           25.4 Canarias                  
##  7           25.3 Asturias                  
##  8           23.8 Galicia                   
##  9           22.7 Aragón                    
## 10           19.5 La Rioja                  
## 11           17.0 Castilla y León           
## 12           16.9 Comunidad Foral de Navarra
## 13           16.9 País Vasco                
## 14           16.7 Castilla-La Mancha        
## 15           15.6 Comunidad de Murcia       
## 16           14.4 Cantabria                 
## 17           14.2 Comunidad Valenciana      
## 18           13.5 Extremadura               
## 19           12.7 Andalucía
top_otro_2011
## # A tibble: 19 × 2
##    Otro11CCAA CCAA                      
##         <dbl> <chr>                     
##  1      16.5  Extremadura               
##  2      14.6  Ceuta                     
##  3      14.3  Galicia                   
##  4      11.6  Canarias                  
##  5      11.4  Andalucía                 
##  6      11.0  Castilla y León           
##  7      10.5  Islas Baleares            
##  8       9.77 Aragón                    
##  9       9.74 Asturias                  
## 10       9.16 Castilla-La Mancha        
## 11       8.34 Cantabria                 
## 12       8.11 Comunidad Valenciana      
## 13       7.88 Comunidad Foral de Navarra
## 14       7.74 La Rioja                  
## 15       7.28 Comunidad de Murcia       
## 16       7.24 Melilla                   
## 17       6.99 Cataluña                  
## 18       6.66 País Vasco                
## 19       6.15 Comunidad de Madrid

In 2021

Per municipality

# Ownership
top_propiedad_2021 <- propiedad |> 
    distinct(cusec, .keep_all = TRUE) |> 
    arrange(desc(Propiedad21Mun)) |> 
    select(Propiedad21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

# Rent
top_alquiler_2021 <- propiedad |> 
    distinct(cusec, .keep_all = TRUE) |> 
    arrange(desc(Alquiler21Mun)) |> 
    select(Alquiler21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

# Other
top_otro_2021 <- propiedad |> 
    distinct(cusec, .keep_all = TRUE) |> 
    arrange(desc(Otro21Mun)) |> 
    select(Propiedad21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

top_propiedad_2021
## # A tibble: 584 × 4
##    Propiedad21Mun MUN_LITERAL          PROV_LITERAL CCAA           
##             <dbl> <chr>                <chr>        <chr>          
##  1           90.9 Martos               Jaén         Andalucía      
##  2           87.0 Andújar              Jaén         Andalucía      
##  3           86.7 5001 <= 10000        Burgos       Castilla y León
##  4           83.9 2001 <= 5000         Salamanca    Castilla y León
##  5           81.6 Lebrija              Sevilla      Andalucía      
##  6           81.2 2001 <= 5000         Zamora       Castilla y León
##  7           80.1 <=2000               Jaén         Andalucía      
##  8           80   Úbeda                Jaén         Andalucía      
##  9           79.5 10001 <= 20000       Jaén         Andalucía      
## 10           79.1 Alhaurín de la Torre Málaga       Andalucía      
## # ℹ 574 more rows
top_alquiler_2021
## # A tibble: 584 × 4
##    Alquiler21Mun MUN_LITERAL                 PROV_LITERAL CCAA               
##            <dbl> <chr>                       <chr>        <chr>              
##  1          57.9 Santiago de Compostela      Coruña, A    Galicia            
##  2          57.7 Girona                      Girona       Cataluña           
##  3          56.5 Barcelona                   Barcelona    Cataluña           
##  4          54.5 Vilanova i la Geltrú        Barcelona    Cataluña           
##  5          53.6 Manresa                     Barcelona    Cataluña           
##  6          52.9 Ponferrada                  León         Castilla y León    
##  7          52.8 Granollers                  Barcelona    Cataluña           
##  8          52.7 Hospitalet de Llobregat, L' Barcelona    Cataluña           
##  9          52.3 Madrid                      Madrid       Comunidad de Madrid
## 10          51.7 Mollet del Vallès           Barcelona    Cataluña           
## # ℹ 574 more rows
top_otro_2021
## # A tibble: 584 × 4
##    Propiedad21Mun MUN_LITERAL    PROV_LITERAL           CCAA          
##             <dbl> <chr>          <chr>                  <chr>         
##  1           20   2001 <= 5000   Palmas, Las            Canarias      
##  2           23.5 <=2000         Palmas, Las            Canarias      
##  3           25.6 <=2000         Santa Cruz de Tenerife Canarias      
##  4           44.4 5001 <= 10000  Teruel                 Aragón        
##  5           43.8 <=2000         Pontevedra             Galicia       
##  6           40.8 Oleiros        Coruña, A              Galicia       
##  7           54.2 10001 <= 20000 Pontevedra             Galicia       
##  8           39.3 Teguise        Palmas, Las            Canarias      
##  9           60.1 2001 <= 5000   Pontevedra             Galicia       
## 10           41.2 <=2000         Balears, Illes         Islas Baleares
## # ℹ 574 more rows

Per province

# Ownership
top_propiedad_2021 <- propiedad |> 
    distinct(CPRO, .keep_all = TRUE) |> 
    arrange(desc(Propiedad21Prov)) |> 
    select(Propiedad21Prov, PROV_LITERAL, CCAA)

# Rent
top_alquiler_2021 <- propiedad |> 
    distinct(CPRO, .keep_all = TRUE) |> 
    arrange(desc(Alquiler21Prov)) |> 
    select(Alquiler21Prov, PROV_LITERAL, CCAA)

# Other
top_otro_2021 <- propiedad |> 
    distinct(CPRO, .keep_all = TRUE) |> 
    arrange(desc(Otro21Prov)) |> 
    select(Propiedad21Mun, PROV_LITERAL, CCAA)

top_propiedad_2021
## # A tibble: 52 × 3
##    Propiedad21Prov PROV_LITERAL CCAA               
##              <dbl> <chr>        <chr>              
##  1            77.0 Jaén         Andalucía          
##  2            69.2 Málaga       Andalucía          
##  3            68.2 Córdoba      Andalucía          
##  4            68   Cádiz        Andalucía          
##  5            68.0 Sevilla      Andalucía          
##  6            67.9 Ciudad Real  Castilla-La Mancha 
##  7            67.3 Huelva       Andalucía          
##  8            66   Murcia       Comunidad de Murcia
##  9            65.4 Albacete     Castilla-La Mancha 
## 10            64.1 Badajoz      Extremadura        
## # ℹ 42 more rows
top_alquiler_2021
## # A tibble: 52 × 3
##    Alquiler21Prov PROV_LITERAL   CCAA               
##             <dbl> <chr>          <chr>              
##  1           46.4 Melilla        Melilla            
##  2           46.0 Barcelona      Cataluña           
##  3           45.4 Madrid         Comunidad de Madrid
##  4           42.6 Girona         Cataluña           
##  5           41.2 Araba/Álava    País Vasco         
##  6           39.9 Lleida         Cataluña           
##  7           38.2 Gipuzkoa       País Vasco         
##  8           38.0 Balears, Illes Islas Baleares     
##  9           36.8 Zaragoza       Aragón             
## 10           36.8 Bizkaia        País Vasco         
## # ℹ 42 more rows
top_otro_2021
## # A tibble: 52 × 3
##    Propiedad21Mun PROV_LITERAL CCAA       
##             <dbl> <chr>        <chr>      
##  1           65.3 Badajoz      Extremadura
##  2           60.1 Pontevedra   Galicia    
##  3           56.8 Teruel       Aragón     
##  4           61.7 Coruña, A    Galicia    
##  5           44.2 Palmas, Las  Canarias   
##  6           71.3 Ourense      Galicia    
##  7           67.7 Lugo         Galicia    
##  8           58.5 Asturias     Asturias   
##  9           63.7 Córdoba      Andalucía  
## 10           69.3 Cantabria    Cantabria  
## # ℹ 42 more rows

Per CCAA

# Ownership
top_propiedad_2021 <- propiedad |> 
    distinct(CCAA, .keep_all = TRUE) |> 
    arrange(desc(Propiedad21CCAA)) |> 
    select(Propiedad21CCAA, CCAA)

# Rent
top_propiedad_2021 <- propiedad |> 
    distinct(CCAA, .keep_all = TRUE) |> 
    arrange(desc(Alquiler21CCAA)) |> 
    select(Alquiler21CCAA, CCAA)

# Other
top_propiedad_2021 <- propiedad |> 
    distinct(CCAA, .keep_all = TRUE) |> 
    arrange(desc(Otro21CCAA)) |> 
    select(Otro21CCAA, CCAA)

top_propiedad_2021
## # A tibble: 19 × 2
##    Otro21CCAA CCAA                      
##         <dbl> <chr>                     
##  1      20.5  Galicia                   
##  2      20.1  Extremadura               
##  3      18.2  Asturias                  
##  4      17.9  Canarias                  
##  5      17.1  Cantabria                 
##  6      15.4  Ceuta                     
##  7      14.4  Castilla y León           
##  8      14.1  Islas Baleares            
##  9      14.0  Andalucía                 
## 10      13.9  Comunidad Foral de Navarra
## 11      13.7  Castilla-La Mancha        
## 12      11.9  Comunidad Valenciana      
## 13      11.8  Aragón                    
## 14      11.4  Cataluña                  
## 15      11.2  La Rioja                  
## 16      10.0  Comunidad de Murcia       
## 17       7.75 Comunidad de Madrid       
## 18       6.8  Melilla                   
## 19       4.96 País Vasco
top_alquiler_2021
## # A tibble: 52 × 3
##    Alquiler21Prov PROV_LITERAL   CCAA               
##             <dbl> <chr>          <chr>              
##  1           46.4 Melilla        Melilla            
##  2           46.0 Barcelona      Cataluña           
##  3           45.4 Madrid         Comunidad de Madrid
##  4           42.6 Girona         Cataluña           
##  5           41.2 Araba/Álava    País Vasco         
##  6           39.9 Lleida         Cataluña           
##  7           38.2 Gipuzkoa       País Vasco         
##  8           38.0 Balears, Illes Islas Baleares     
##  9           36.8 Zaragoza       Aragón             
## 10           36.8 Bizkaia        País Vasco         
## # ℹ 42 more rows
top_otro_2021
## # A tibble: 52 × 3
##    Propiedad21Mun PROV_LITERAL CCAA       
##             <dbl> <chr>        <chr>      
##  1           65.3 Badajoz      Extremadura
##  2           60.1 Pontevedra   Galicia    
##  3           56.8 Teruel       Aragón     
##  4           61.7 Coruña, A    Galicia    
##  5           44.2 Palmas, Las  Canarias   
##  6           71.3 Ourense      Galicia    
##  7           67.7 Lugo         Galicia    
##  8           58.5 Asturias     Asturias   
##  9           63.7 Córdoba      Andalucía  
## 10           69.3 Cantabria    Cantabria  
## # ℹ 42 more rows

Difference

Per municipality

# Propiedad
top_propiedad_dif <- propiedad |> 
    distinct(cusec, .keep_all = TRUE) |> 
    arrange(desc(PropiedadDifMun)) |> 
  select(MUN_LITERAL, PROV_LITERAL, PropiedadDifMun, AlquilerDifMun, OtroDifMun, Herencia11Mun) |> 
  filter(PropiedadDifMun > 5)

# Alquiler
top_alquiler_dif <- propiedad |> 
    distinct(cusec, .keep_all = TRUE) |> 
    arrange(desc(AlquilerDifMun)) |> 
    select(Alquiler21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

# Otro
top_otro_dif <- propiedad |> 
    distinct(cusec, .keep_all = TRUE) |> 
    arrange(desc(OtroDifMun)) |> 
    select(Propiedad21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)

top_propiedad_dif
## # A tibble: 23 × 6
##    MUN_LITERAL            PROV_LITERAL PropiedadDifMun AlquilerDifMun OtroDifMun
##    <chr>                  <chr>                  <dbl>          <dbl>      <dbl>
##  1 Nerja                  Málaga                 25.8          -28         2.23 
##  2 <=2000                 Jaén                   13.8           -1.24    -12.5  
##  3 Úbeda                  Jaén                   13.5          -10.4      -3.15 
##  4 2001 <= 5000           Huesca                 11.6          -14.2       2.62 
##  5 Alhama de Murcia       Murcia                 11.4           -7        -4.43 
##  6 5001 <= 10000          Burgos                  9.83         -12.9       3.07 
##  7 <=2000                 Lugo                    9.78         -10.2       0.450
##  8 <=2000                 Murcia                  9.27           4.85    -14.1  
##  9 Sant Josep de sa Tala… Balears, Il…            9.2           -8.54     -0.65 
## 10 Maó                    Balears, Il…            8.32          -5.82     -2.51 
## # ℹ 13 more rows
## # ℹ 1 more variable: Herencia11Mun <dbl>
top_alquiler_dif
## # A tibble: 584 × 4
##    Alquiler21Mun MUN_LITERAL        PROV_LITERAL      CCAA                
##            <dbl> <chr>              <chr>             <chr>               
##  1          47.0 2001 <= 5000       Araba/Álava       País Vasco          
##  2          43.2 <=2000             Araba/Álava       País Vasco          
##  3          44.6 Alfafar            Valencia/València Comunidad Valenciana
##  4          51.7 Mollet del Vallès  Barcelona         Cataluña            
##  5          37.0 Picassent          Valencia/València Comunidad Valenciana
##  6          36.5 Catarroja          Valencia/València Comunidad Valenciana
##  7          52.9 Ponferrada         León              Castilla y León     
##  8          35.8 Alaquàs            Valencia/València Comunidad Valenciana
##  9          43.1 Valdemoro          Madrid            Comunidad de Madrid 
## 10          37.8 Barberà del Vallès Barcelona         Cataluña            
## # ℹ 574 more rows
top_otro_dif
## # A tibble: 584 × 4
##    Propiedad21Mun MUN_LITERAL             PROV_LITERAL           CCAA           
##             <dbl> <chr>                   <chr>                  <chr>          
##  1           20   2001 <= 5000            Palmas, Las            Canarias       
##  2           39.3 Teguise                 Palmas, Las            Canarias       
##  3           40.8 Oleiros                 Coruña, A              Galicia        
##  4           44.4 5001 <= 10000           Teruel                 Aragón         
##  5           23.5 <=2000                  Palmas, Las            Canarias       
##  6           25.6 <=2000                  Santa Cruz de Tenerife Canarias       
##  7           63.1 Don Benito              Badajoz                Extremadura    
##  8           81.2 2001 <= 5000            Zamora                 Castilla y León
##  9           64.0 Villanueva de la Serena Badajoz                Extremadura    
## 10           58.5 Estrada, A              Pontevedra             Galicia        
## # ℹ 574 more rows
# Save top dif table
ft <- flextable(top_propiedad_dif)
ft <- autofit(ft)
save_as_image(x = ft, path = paste0(getwd(), "/tablatopdif.png"))
## [1] "C:/Users/alici/OneDrive/Documentos/Clase/Máster/Bcol/tablatopdif.png"
# Save bot dif table
bot_propiedad_dif <- propiedad |> 
    distinct(cusec, .keep_all = TRUE) |> 
    arrange(PropiedadDifMun) |> 
  select(MUN_LITERAL, PROV_LITERAL, PropiedadDifMun, AlquilerDifMun, OtroDifMun) |> 
  filter(PropiedadDifMun < -28)

ft <- flextable(bot_propiedad_dif)
ft <- autofit(ft)
save_as_image(x = ft, path = paste0(getwd(), "/tablabotdif.png"))
## [1] "C:/Users/alici/OneDrive/Documentos/Clase/Máster/Bcol/tablabotdif.png"
# Graphs

ggplot(top_propiedad_dif, aes(x = reorder(MUN_LITERAL, PropiedadDifMun), y = PropiedadDifMun, fill = "Propiedad")) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_bar(aes(y = AlquilerDifMun, fill = "Alquiler"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = OtroDifMun, fill = "Otro"), stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("Propiedad" = "skyblue2", "Alquiler" = "green3", "Otro" = "red2")) +
  labs(title = "Cambios en Propiedad, Alquiler y Otro por Municipio",
       x = "Municipio",
       y = "Cambios",
       fill = "Tipo de Cambio") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = guide_legend(title = "Tipo de Cambio")) +
  coord_flip()+
  theme_minimal()

ggplot(bot_propiedad_dif, aes(x = reorder(MUN_LITERAL, PropiedadDifMun), y = PropiedadDifMun, fill = "Propiedad")) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_bar(aes(y = AlquilerDifMun, fill = "Alquiler"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = OtroDifMun, fill = "Otro"), stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("Propiedad" = "skyblue2", "Alquiler" = "green3", "Otro" = "red2")) +
  labs(title = "Cambios en Propiedad, Alquiler y Otro por Municipio",
       x = "Municipio",
       y = "Cambios",
       fill = "Tipo de Cambio") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = guide_legend(title = "Tipo de Cambio")) +
  coord_flip()+
  theme_minimal()

Maps

In 2011

Per province

library(mapSpain)
  
esp_can <- esp_get_country() 
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box() 
munic <- esp_get_munic() 

provic <- esp_get_prov() |> 
  mutate(CPRO = cpro)

# Ownership
mapa <- propiedad |> 
  select(CPRO, Propiedad11Prov) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p1 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Propiedad11Prov), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
geom_sf_text(data = mapa, aes(label = round(Propiedad11Prov, 1), color = ifelse(Propiedad11Prov > 75, "white", "black")), 
             size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "% of Young People in 'Property' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
        plot.background = element_rect(fill = "white", color = NA),  
    panel.background = element_rect(fill = "white", color = NA) 
  ) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1) +
  scale_color_identity()  

# Inheritance or donation

mapa <- propiedad |> 
  select(CPRO, Herencia11Prov) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p2 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Herencia11Prov), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Herencia11Prov, 1)), 
               color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "% of Young People in 'Inheritance/Donation' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
        plot.background = element_rect(fill = "white", color = NA), 
    panel.background = element_rect(fill = "white", color = NA) 
  ) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1)

# Rent
mapa <- propiedad |> 
  select(CPRO, Alquiler11Prov) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p3 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Alquiler11Prov), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Alquiler11Prov, 1)), 
               color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03,  family = "Montserrat") + 
  theme_void() +  
  labs(title = "% of Young People in 'Rent' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
    plot.background = element_rect(fill = "white", color = NA), 
    panel.background = element_rect(fill = "white", color = NA) 
) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1)

# Other
mapa <- propiedad |> 
  select(CPRO, Otro11Prov) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p4 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Otro11Prov), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Otro11Prov, 1)), 
               color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "% of Young People in 'Other' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Times New Roman", size = 14),
    text = element_text(family = "Times New Roman"),
    plot.background = element_rect(fill = "white", color = NA), 
    panel.background = element_rect(fill = "white", color = NA)
  ) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1)

# Save as image together
p1 <- p1 + theme(plot.margin = margin(20, 0, 5, 0))
p2 <- p2 + theme(plot.margin = margin(20, 0, 5, 0))
p3 <- p3 + theme(plot.margin = margin(5, 0, 5, 0))
p4 <- p4 + theme(plot.margin = margin(5, 0, 5, 0))

# Add individual titles to each plot
p1 <- p1 + labs(title = "Ownership") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p2 <- p2 + labs(title = "Inheritance", family = "Times New roman") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p3 <- p3 + labs(title = "Rent") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p4 <- p4 + labs(title = "Other") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))

# Add a general title
combined_plot <- ggarrange(p1, p2, p3, p4, ncol = 2, nrow = 2, common.legend = FALSE) +
  ggtitle("% of Young people per Tenure Regime in 2011") +
  theme(plot.title = element_text(hjust = 0.5, size = 18, family = "Times New Roman", face = "bold"))  # Centramos el título general
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
combined_plot <- annotate_figure(combined_plot, bottom = text_grob("Source: Self-elaborated from Census data", hjust = 0, x = 0, size = 14, color = "#555555", family = "Times New Roman"))
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
# Save as image
#ggsave("2011.png", plot = combined_plot, width = 12, height = 8, bg = "white")

combined_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Per CCAA

library(mapSpain)
  
esp_can <- esp_get_country() 
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box() 
munic <- esp_get_munic() 
ccaa <- esp_get_ccaa()

ccaa <- ccaa |> 
  mutate(CCAA = ine.ccaa.name)

ccaa <- ccaa |>
  mutate(CCAA = ine.ccaa.name) |>
  mutate(CCAA = case_when(
    CCAA == "Asturias, Principado de" ~ "Asturias",
    CCAA == "Balears, Illes" ~ "Islas Baleares",
    CCAA == "Castilla - La Mancha" ~ "Castilla-La Mancha",
    CCAA == "Comunitat Valenciana" ~ "Comunidad Valenciana",
    CCAA == "Madrid, Comunidad de" ~ "Comunidad de Madrid",
    CCAA == "Murcia, Región de" ~ "Comunidad de Murcia",
    CCAA == "Navarra, Comunidad Foral de" ~ "Comunidad Foral de Navarra",
    CCAA == "Rioja, La" ~ "La Rioja",
    TRUE ~ CCAA
  ))

# Ownership
mapa <- propiedad |> 
  select(CCAA, Propiedad11CCAA) |> 
  distinct(CCAA, .keep_all = TRUE)

mapa <- ccaa |> 
  left_join(mapa, by = "CCAA")

p1 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Propiedad11CCAA), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Propiedad11CCAA, 1), color = ifelse(Propiedad11CCAA > 75, "white", "black")), 
               size = 2.4, check_overlap = TRUE, nudge_y = -0.03) + 
  theme_void() +  
  labs(title = "% of Young People in 'Property' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
        plot.background = element_rect(fill = "white", color = NA),  # Fondo blanco para la figura
    panel.background = element_rect(fill = "white", color = NA)  # Fondo blanco para el panel
  ) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1) +
  scale_color_identity()  

# Herencia o donación
mapa <- propiedad |> 
  select(CCAA, Herencia11CCAA) |> 
  distinct(CCAA, .keep_all = TRUE)

mapa <- ccaa |> 
  left_join(mapa, by = "CCAA")

p2 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Herencia11CCAA), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Herencia11CCAA, 1)), 
               color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) + 
  theme_void() +  
  labs(title = "% of Young People in 'Inheritance/Donation' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
        plot.background = element_rect(fill = "white", color = NA),  
    panel.background = element_rect(fill = "white", color = NA)  
  ) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1)

# Rent
mapa <- propiedad |> 
  select(CCAA, Alquiler11CCAA) |> 
  distinct(CCAA, .keep_all = TRUE)

mapa <- ccaa |> 
  left_join(mapa, by = "CCAA")

p3 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Alquiler11CCAA), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Alquiler11CCAA, 1)), 
               color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) + 
  theme_void() +  
  labs(title = "% of Young People in 'Rent' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
    plot.background = element_rect(fill = "white", color = NA), 
    panel.background = element_rect(fill = "white", color = NA)
) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1)

# Other
mapa <- propiedad |> 
  select(CCAA, Otro11CCAA) |> 
  distinct(CCAA, .keep_all = TRUE)

mapa <- ccaa |> 
  left_join(mapa, by = "CCAA")

p4 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Otro11CCAA), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Otro11CCAA, 1)), 
               color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) + 
  theme_void() +  
  labs(title = "% of Young People in 'Other' Residence Status in 2011") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
    plot.background = element_rect(fill = "white", color = NA), 
    panel.background = element_rect(fill = "white", color = NA) 
  ) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1)

p1 <- p1 + theme(plot.margin = margin(5, 5, 5, 5))
p2 <- p2 + theme(plot.margin = margin(5, 5, 5, 5))
p3 <- p3 + theme(plot.margin = margin(5, 5, 5, 5))
p4 <- p4 + theme(plot.margin = margin(5, 5, 5, 5))

combined_plot <- ggarrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

In 2021

Per province

library(mapSpain)

esp_can <- esp_get_country() 
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box() 
munic <- esp_get_munic() 

provic <- esp_get_prov() |> 
  mutate(CPRO = cpro)

mapa <- propiedad |> 
  select(CPRO, Propiedad21Prov) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

# Ownership
p5 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Propiedad21Prov), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Propiedad21Prov, 1), color = ifelse(Propiedad21Prov > 75, "white", "black")), 
               size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "% of Young People in 'Property' Residence Status in 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri"),
    plot.background = element_rect(fill = "white", color = NA), 
    panel.background = element_rect(fill = "white", color = NA) 
  ) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1) +
  scale_color_identity()  
 
# Rent
mapa <- propiedad |> 
  select(CPRO, Alquiler21Prov) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p6 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Alquiler21Prov), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Alquiler21Prov, 1), 
                                color = ifelse(Alquiler21Prov > 38, "white", "black")), 
               size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "% of Young People in 'Rent' Residence Status in 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri")) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1) +
  scale_color_identity()  

# Other
mapa <- propiedad |> 
  select(CPRO, Otro21Prov) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p7 <-ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = Otro21Prov), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(Otro21Prov, 1), color = ifelse(Otro21Prov > 19, "white", "black")), size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "% of Young People in 'Other' Residence Status in 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri")) + 
  coord_sf() +
  scale_fill_viridis_c(option = "C", direction = -1) +
  scale_color_identity()  

# Save as image together
p5 <- p5 + theme(plot.margin = margin(20, 0, 5, 0))
p6 <- p6 + theme(plot.margin = margin(20, 0, 5, 0))
p7 <- p7 + theme(plot.margin = margin(5, 0, 5, 0))

# Add individual titles
p5 <- p5 + labs(title = "Ownership") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p6 <- p6 + labs(title = "Rent", family = "Times New roman") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p7 <- p7 + labs(title = "Other") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))

# Add general title
combined_plot <- ggarrange(p5, p6, p7, ncol = 2, nrow = 2, common.legend = FALSE) +
  ggtitle("% of Young people per Tenure Regime in 2021") +
  theme(plot.title = element_text(hjust = 0.5, size = 18, family = "Times New Roman", face = "bold"))  
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
combined_plot <- annotate_figure(combined_plot, bottom = text_grob("Source: Self-elaborated from Census data", hjust = 0, x = 0, size = 14, color = "#555555", family = "Times New Roman"))

# Save as image
#ggsave("2021.png", plot = combined_plot, width = 12, height = 8, bg = "white")

combined_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Difference

Per municipality

library(mapSpain)
  
esp_can <- esp_get_country() 
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box() 
munic <- esp_get_munic() 

munic <- munic |>  
  mutate(cusec = LAU_CODE) |> 
  select(geometry, cusec)

# Ownership
mapa <- propiedad |> 
  select(cusec, PropiedadDifMun)
  
mapa <- munic |> 
  left_join(mapa, by = "cusec")

mapa <- mapa |> filter(!is.na(PropiedadDifMun)) 

pa <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = PropiedadDifMun), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  theme_void() +  
  labs(title = "Difference of % of Young People in 'Property' Residence Status in 2011 to 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri")
  ) + 
  coord_sf() +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)

# Rent
mapa <- propiedad |> 
  select(cusec, AlquilerDifMun)
  
mapa <- munic |> 
  left_join(mapa, by = "cusec")
  
mapa <- mapa |> filter(!is.na(AlquilerDifMun))

pb <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = AlquilerDifMun), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  theme_void() +  
  labs(title = "Difference of % of Young People in 'Rent' Residence Status in 2011 to 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri")
  ) + 
  coord_sf() +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)

ggarrange(pa, pb, ncol = 2, nrow = 1)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Per province

library(mapSpain)

esp_can <- esp_get_country() 
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box() 
munic <- esp_get_munic() 

provic <- esp_get_prov() |> 
  mutate(CPRO = cpro)

# Propiedad
mapa <- propiedad |> 
  select(CPRO, PropiedadDifProv) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

  p8 <- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = PropiedadDifProv), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(PropiedadDifProv, 1)), 
               color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) + 
  theme_void() +  
  labs(title = "Difference of % of Young People in 'Property' Residence Status in 2011 to 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri")
  ) + 
  coord_sf() +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)

# Rent
mapa <- propiedad |> 
  select(CPRO, AlquilerDifProv) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p9<- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = AlquilerDifProv), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(AlquilerDifProv, 1)), color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "Difference of % of Young People in 'Rent' Residence Status in 2011 to 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri")) + 
  coord_sf() +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)

# Other
mapa <- propiedad |> 
  select(CPRO, OtroDifProv) |> 
  distinct(CPRO, .keep_all = TRUE)

mapa <- provic |> 
  left_join(mapa, by = "CPRO")

p10<- ggplot(esp_can) +
  geom_sf(color = "black", size = 0.7) +  
  geom_sf(data = can_prov) +
  geom_sf(data = can_box) + 
  geom_sf(data = mapa, aes(fill = OtroDifProv), color = "black", size = 0.3) +  
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +  
  geom_sf_text(data = mapa, aes(label = round(OtroDifProv, 1)), color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") + 
  theme_void() +  
  labs(title = "Difference of % of Young People in 'Rent' Residence Status in 2011 to 2021") +  
  theme(
    legend.position = "none",
    plot.title = element_text(family = "Calibri", size = 14),
    text = element_text(family = "Calibri")) + 
  coord_sf() +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)

# Save as image together
p8 <- p8 + theme(plot.margin = margin(10, 0, 0, -30))
p9 <- p9 + theme(plot.margin = margin(0, 0, 0, -30))

# Add individual titles
p8 <- p8 + labs(title = "Ownership") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p9 <- p9 + labs(title = "Rent", family = "Times New roman") +
  theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))

# Add title
combined_plot <- ggarrange(p8, p9, ncol = 1, nrow = 2, common.legend = FALSE) +
  ggtitle("Difference of % of Young people per Tenure Regime in 2011 to 2021") +
  theme(plot.title = element_text(hjust = 0.5, size = 14, family = "Times New Roman", face = "bold"))  # Centramos el título general
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
combined_plot <- annotate_figure(combined_plot, bottom = text_grob("Source: Self-elaborated from Census data", hjust = 0.5, x = 0.5, size = 12, color = "#555555", family = "Times New Roman"))

# Save as image
#ggsave("difpropiedadalquiler.png", plot = combined_plot, width = 12, height = 8, bg = "white")

combined_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

# - # Multilevel model

# Check we have the data
municipios <- read_xlsx("municipios.xlsx")

numeric_data <- municipios %>%
  ungroup() |> 
  select_if(is.numeric) 

# Scale
numeric_data <- scale(numeric_data)
numeric_data <- as.data.frame(numeric_data)

#Add CPRO and CCAA columns
numeric_data <- numeric_data %>%
  mutate(id = row_number())

cpros <- municipios |> 
  select(cusec, CPRO, CCAA) |> 
  mutate(id = row_number())

numeric_data <- numeric_data |> 
  left_join(cpros, by = "id") |> 
  select(-id)

# Select variables
independientes <- numeric_data |> 
  select(CCAA, CPRO, cusec, PropiedadDifMun, ipvGeneralDif, PorcentajeJovenDif, VaciasDif, SecundariasDif, PorcentajeEspañolaDif, PorcentajeEuropeaDif, PorcentajeNoEuropeaDif, DifPrecioHipotecaProv, ParoJuvenilDif, ProtegidaDif, RentaMedia2021, PorcentajeTurísticas2021, PrecioAlquilerM2, Herencia11Mun) |> 

  # Feature engineering
    mutate(PrecioAlquilerM22=PrecioAlquilerM2^2,
         RentaMedia2021_2 = RentaMedia2021^2,
         PorcentajeTurísticas2021_2 = PorcentajeTurísticas2021^2,
         DifPrecioHipotecaProv2 = DifPrecioHipotecaProv^2,
         Herencia11Mun2 = (Herencia11Mun)^2,
           VaciasDif2 = VaciasDif^2,
           RentaMedia2021_exp = exp(RentaMedia2021),
         PorcentajeEspañolaDif2 = PorcentajeEspañolaDif^2,
        PorcentajeEuropeaDif2 = PorcentajeEuropeaDif^2,
         PorcentajeNoEuropeaDif2 = PorcentajeNoEuropeaDif^2,
    SecundariasDif2 = (SecundariasDif)^2)

variables <- independientes |> 
  select(CCAA, CPRO, PropiedadDifMun, Herencia11Mun, PorcentajeJovenDif, VaciasDif, SecundariasDif, PorcentajeNoEuropeaDif, DifPrecioHipotecaProv, ParoJuvenilDif, ProtegidaDif, PorcentajeTurísticas2021, PrecioAlquilerM2, RentaMedia2021_2, PrecioAlquilerM22, DifPrecioHipotecaProv2, Herencia11Mun2, PorcentajeEspañolaDif2, PorcentajeEuropeaDif2, RentaMedia2021, PorcentajeTurísticas2021_2)

The model itself

modelo_multi <- lmer(PropiedadDifMun ~ PorcentajeJovenDif + PorcentajeEspañolaDif2 + PorcentajeEspañolaDif2 +  PorcentajeNoEuropeaDif + RentaMedia2021 + VaciasDif + PorcentajeTurísticas2021 + PorcentajeTurísticas2021_2 + PrecioAlquilerM2 +  + PrecioAlquilerM22 + Herencia11Mun +  Herencia11Mun2 + (1|CPRO) + (1|CCAA), data = variables)

summary(modelo_multi)
## Linear mixed model fit by REML ['lmerMod']
## Formula: PropiedadDifMun ~ PorcentajeJovenDif + PorcentajeEspañolaDif2 +  
##     PorcentajeEspañolaDif2 + PorcentajeNoEuropeaDif + RentaMedia2021 +  
##     VaciasDif + PorcentajeTurísticas2021 + PorcentajeTurísticas2021_2 +  
##     PrecioAlquilerM2 + +PrecioAlquilerM22 + Herencia11Mun + Herencia11Mun2 +  
##     (1 | CPRO) + (1 | CCAA)
##    Data: variables
## 
## REML criterion at convergence: 1408
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3090 -0.6050 -0.0046  0.6125  4.1847 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  CPRO     (Intercept) 0.11625  0.3410  
##  CCAA     (Intercept) 0.04024  0.2006  
##  Residual             0.53533  0.7317  
## Number of obs: 584, groups:  CPRO, 52; CCAA, 19
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                -0.125118   0.090270  -1.386
## PorcentajeJovenDif          0.154497   0.039257   3.936
## PorcentajeEspañolaDif2      0.098858   0.019585   5.048
## PorcentajeNoEuropeaDif     -0.287264   0.054263  -5.294
## RentaMedia2021              0.117529   0.048264   2.435
## VaciasDif                   0.133401   0.043981   3.033
## PorcentajeTurísticas2021    0.233317   0.070700   3.300
## PorcentajeTurísticas2021_2 -0.032171   0.010165  -3.165
## PrecioAlquilerM2           -0.326464   0.071356  -4.575
## PrecioAlquilerM22           0.167857   0.029070   5.774
## Herencia11Mun               0.126894   0.063379   2.002
## Herencia11Mun2             -0.054791   0.008316  -6.589
## Warning in abbreviate(rn, minlength = 11): abreviatura utilizada con caracteres
## no ASCII
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abreviatura utilizada con caracteres
## no ASCII
## Warning in abbreviate(rn, minlength = 6): abreviatura utilizada con caracteres
## no ASCII
##             (Intr) PrcnJD PrEñD2 PrcNED RM2021 VacsDf PrT2021 PT2021_ PrcAM2
## PrcntjJvnDf -0.067                                                          
## PrcntjEsñD2 -0.191  0.120                                                   
## PrcntjNErpD  0.143 -0.367 -0.613                                            
## RentaMd2021  0.054 -0.115 -0.024  0.211                                     
## VaciasDif    0.049 -0.192 -0.023  0.215  0.188                              
## PrcntjT2021  0.053  0.033 -0.064 -0.019 -0.002 -0.048                       
## PrcnT2021_2 -0.082 -0.062 -0.055  0.106  0.036  0.003 -0.820                
## PrecAlqlrM2  0.236 -0.066  0.039 -0.056 -0.316  0.111 -0.257   0.110        
## PrcAlqlrM22 -0.350 -0.029 -0.051  0.041 -0.043 -0.180  0.125  -0.041  -0.627
## Herenci11Mn  0.151 -0.180  0.010  0.067  0.084 -0.244 -0.273   0.166   0.339
## Herenc11Mn2 -0.138  0.169  0.007 -0.099 -0.088  0.046  0.179  -0.082  -0.184
##             PrAM22 Hrn11M
## PrcntjJvnDf              
## PrcntjEsñD2              
## PrcntjNErpD              
## RentaMd2021              
## VaciasDif                
## PrcntjT2021              
## PrcnT2021_2              
## PrecAlqlrM2              
## PrcAlqlrM22              
## Herenci11Mn -0.240       
## Herenc11Mn2  0.128 -0.769
# Test multicolinearity
vif(modelo_multi)
##         PorcentajeJovenDif     PorcentajeEspañolaDif2 
##                   1.277609                   1.725807 
##     PorcentajeNoEuropeaDif             RentaMedia2021 
##                   2.053334                   1.478132 
##                  VaciasDif   PorcentajeTurísticas2021 
##                   1.517233                   3.657516 
## PorcentajeTurísticas2021_2           PrecioAlquilerM2 
##                   3.356258                   2.551298 
##          PrecioAlquilerM22              Herencia11Mun 
##                   1.929337                   3.661844 
##             Herencia11Mun2 
##                   2.719771
# Get table

tab_model(modelo_multi, show.ci = F, show.se = T, pred.labels = c("(Intercept)", "Young Population Diff", "Spanish Population Diff ^2", "European Population Diff ^2", "Non-European Population Diff", "Average Income 2021", "Empty Dwellings Diff", "Tourist Dwellings 2021", "Tourist Dwellings 2021", "M2 Rental Price 2021", "M2 Rental Price 2021 ^2", "Inheritance 2011", "Inheritance 2011 ^2"))
## Length of `pred.labels` does not equal number of predictors, no labelling applied.
  Propiedad Dif Mun
Predictors Estimates std. Error p
(Intercept) -0.13 0.09 0.166
PorcentajeJovenDif 0.15 0.04 <0.001
PorcentajeEspañolaDif2 0.10 0.02 <0.001
PorcentajeNoEuropeaDif -0.29 0.05 <0.001
RentaMedia2021 0.12 0.05 0.015
VaciasDif 0.13 0.04 0.003
PorcentajeTurísticas2021 0.23 0.07 0.001
PorcentajeTurísticas2021_2 -0.03 0.01 0.002
PrecioAlquilerM2 -0.33 0.07 <0.001
PrecioAlquilerM22 0.17 0.03 <0.001
Herencia11Mun 0.13 0.06 0.046
Herencia11Mun2 -0.05 0.01 <0.001
Random Effects
σ2 0.54
τ00 CPRO 0.12
τ00 CCAA 0.04
ICC 0.23
N CPRO 52
N CCAA 19
Observations 584
Marginal R2 / Conditional R2 0.268 / 0.433
# Table of random effects intercepts
ranef(modelo_multi)
## $CPRO
##      (Intercept)
## 01 -0.3394201888
## 02 -0.0539252098
## 03 -0.1102050355
## 04  0.0954522909
## 05 -0.0030815013
## 06 -0.2778106851
## 07  0.0571203794
## 08 -0.4568979505
## 09  0.1673043619
## 10  0.2786687013
## 11  0.0493371339
## 12  0.2945304340
## 13 -0.2341137245
## 14 -0.5279151647
## 15 -0.3523170103
## 16  0.2185750495
## 17  0.0978208873
## 18  0.1330674041
## 19 -0.0436789962
## 20  0.1694214666
## 21 -0.0239980750
## 22  0.1978326981
## 23  0.3980492004
## 24 -0.3650156094
## 25 -0.1456538803
## 26  0.3115981280
## 27  0.3179673614
## 28 -0.0600658908
## 29  0.6199853744
## 30  0.3768622041
## 31 -0.0761234298
## 32  0.1305461700
## 33  0.0258548810
## 34 -0.1110030877
## 35 -0.3423124471
## 36 -0.0178135566
## 37  0.2895817566
## 38 -0.2331998622
## 39 -0.2960146211
## 40  0.1918394903
## 41  0.0721591718
## 42 -0.0523262097
## 43  0.0731430582
## 44  0.1325878437
## 45  0.1361283070
## 46 -0.5842958011
## 47 -0.1406036110
## 48 -0.0973106305
## 49  0.0001320626
## 50 -0.2655381377
## 51  0.2133976247
## 52  0.1616768755
## 
## $CCAA
##                              (Intercept)
## Andalucía                   0.2824937152
## Aragón                      0.0224580724
## Asturias                    0.0089492798
## Canarias                   -0.1992049662
## Cantabria                  -0.1024610276
## Castilla-La Mancha          0.0079560609
## Castilla y León            -0.0080207611
## Cataluña                   -0.1493876825
## Ceuta                       0.0738643917
## Comunidad de Madrid        -0.0207909085
## Comunidad de Murcia         0.1304452076
## Comunidad Foral de Navarra -0.0263489851
## Comunidad Valenciana       -0.1384437644
## Extremadura                 0.0002969894
## Galicia                     0.0271310892
## Islas Baleares              0.0197713638
## La Rioja                    0.1078550251
## Melilla                     0.0559620290
## País Vasco                 -0.0925251288
## 
## with conditional variances for "CPRO" "CCAA"
random_effects <- ranef(modelo_multi)

random_effects_df <- as.data.frame(random_effects)

# Add labels
locations <- read_excel("vivienda turística/exp_viv_turistica_tabla5_FEB2021.xlsx", sheet = 3, na = "Dato protegido por secreto estadístico.")

nombres_provincias <- locations |> 
  select(PROV_LITERAL, PROV) |> 
  rename(CPRO = PROV) |> 
  distinct(CPRO, .keep_all = TRUE)

nombres <- nombres_provincias$PROV_LITERAL
rownames(random_effects_df)[1:52] <- nombres[1:52]

#ccaa
ccaa <- unique(municipios$CCAA)

ccaa <- as.data.frame(ccaa)

ccaa <- ccaa %>%
  mutate(ccaa = if_else(ccaa %in% nombres, paste0(ccaa, " (CCAA)"), ccaa))

# Final RF table
tab_df(random_effects_df, 
          title = "Random Effects Estimates", 
          show.rownames = TRUE) 
Random Effects Estimates
Row grpvar term grp condval condsd
Araba/Álava CPRO (Intercept) 01 -0.34 0.26
Albacete CPRO (Intercept) 02 -0.05 0.22
Alicante/Alacant CPRO (Intercept) 03 -0.11 0.18
Almería CPRO (Intercept) 04 0.10 0.21
Ávila CPRO (Intercept) 05 -0.00 0.26
Badajoz CPRO (Intercept) 06 -0.28 0.23
Balears, Illes CPRO (Intercept) 07 0.06 0.21
Barcelona CPRO (Intercept) 08 -0.46 0.16
Burgos CPRO (Intercept) 09 0.17 0.24
Cáceres CPRO (Intercept) 10 0.28 0.24
Cádiz CPRO (Intercept) 11 0.05 0.18
Castellón/Castelló CPRO (Intercept) 12 0.29 0.21
Ciudad Real CPRO (Intercept) 13 -0.23 0.22
Córdoba CPRO (Intercept) 14 -0.53 0.20
Coruña, A CPRO (Intercept) 15 -0.35 0.20
Cuenca CPRO (Intercept) 16 0.22 0.25
Girona CPRO (Intercept) 17 0.10 0.21
Granada CPRO (Intercept) 18 0.13 0.20
Guadalajara CPRO (Intercept) 19 -0.04 0.24
Gipuzkoa CPRO (Intercept) 20 0.17 0.22
Huelva CPRO (Intercept) 21 -0.02 0.21
Huesca CPRO (Intercept) 22 0.20 0.25
Jaén CPRO (Intercept) 23 0.40 0.21
León CPRO (Intercept) 24 -0.37 0.23
Lleida CPRO (Intercept) 25 -0.15 0.25
Rioja, La CPRO (Intercept) 26 0.31 0.25
Lugo CPRO (Intercept) 27 0.32 0.25
Madrid CPRO (Intercept) 28 -0.06 0.19
Málaga CPRO (Intercept) 29 0.62 0.17
Murcia CPRO (Intercept) 30 0.38 0.20
Navarra CPRO (Intercept) 31 -0.08 0.25
Ourense CPRO (Intercept) 32 0.13 0.25
Asturias CPRO (Intercept) 33 0.03 0.22
Palencia CPRO (Intercept) 34 -0.11 0.26
Palmas, Las CPRO (Intercept) 35 -0.34 0.20
Pontevedra CPRO (Intercept) 36 -0.02 0.21
Salamanca CPRO (Intercept) 37 0.29 0.24
Santa Cruz de Tenerife CPRO (Intercept) 38 -0.23 0.20
Cantabria CPRO (Intercept) 39 -0.30 0.23
Segovia CPRO (Intercept) 40 0.19 0.26
Sevilla CPRO (Intercept) 41 0.07 0.17
Soria CPRO (Intercept) 42 -0.05 0.26
Tarragona CPRO (Intercept) 43 0.07 0.20
Teruel CPRO (Intercept) 44 0.13 0.25
Toledo CPRO (Intercept) 45 0.14 0.23
Valencia/València CPRO (Intercept) 46 -0.58 0.17
Valladolid CPRO (Intercept) 47 -0.14 0.24
Bizkaia CPRO (Intercept) 48 -0.10 0.20
Zamora CPRO (Intercept) 49 0.00 0.24
Zaragoza CPRO (Intercept) 50 -0.27 0.25
Ceuta CPRO (Intercept) 51 0.21 0.31
Melilla CPRO (Intercept) 52 0.16 0.31
53 CCAA (Intercept) Andalucía 0.28 0.11
54 CCAA (Intercept) Aragón 0.02 0.16
55 CCAA (Intercept) Asturias 0.01 0.18
56 CCAA (Intercept) Canarias -0.20 0.16
57 CCAA (Intercept) Cantabria -0.10 0.18
58 CCAA (Intercept) Castilla-La Mancha 0.01 0.14
59 CCAA (Intercept) Castilla y León -0.01 0.12
60 CCAA (Intercept) Cataluña -0.15 0.14
61 CCAA (Intercept) Ceuta 0.07 0.19
62 CCAA (Intercept) Comunidad de Madrid -0.02 0.18
63 CCAA (Intercept) Comunidad de Murcia 0.13 0.18
64 CCAA (Intercept) Comunidad Foral de Navarra -0.03 0.18
65 CCAA (Intercept) Comunidad Valenciana -0.14 0.15
66 CCAA (Intercept) Extremadura 0.00 0.17
67 CCAA (Intercept) Galicia 0.03 0.15
68 CCAA (Intercept) Islas Baleares 0.02 0.18
69 CCAA (Intercept) La Rioja 0.11 0.18
70 CCAA (Intercept) Melilla 0.06 0.19
71 CCAA (Intercept) País Vasco -0.09 0.16

Normality

Heterocedasticity was tested on a linear model just like the final multilevel model but without the random effects. There was a little heterocedasticity but residuals didn’t distribute in clear patterns.

Regarding normality, the Shapiro-Wilk test gives a value less than 0.05, which tells us that the data do not follow a normal distribution.

However, in the QQ plot, we see that the data mostly fit the line perfectly, but at the beginning and the end, they deviate symmetrically: at the beginning, the values that separate from the line go down, and at the end, they go up. It appears to follow a normal or ‘light-tailed’ distribution. This is related to having fewer extreme values than a normal distribution.

#qq plot
residuos <- resid(modelo_multi)

qqnorm(residuos)
qqline(residuos)

shapiro.test(residuos)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuos
## W = 0.99321, p-value = 0.009694

Independent vs Dependent Variable Relationships Plots

independientes <- independientes %>%
  select_if(is.numeric)

# Get names of independent variables
variables_independientes <- setdiff(names(independientes), "PropiedadDifMun")

# Function to get plots & trasnform variables
generar_grafico <- function(variable) {
  # Crear transformaciones
  independientes[[paste0(variable, "2")]] <- independientes[[variable]]^2
  independientes[[paste0(variable, "_sqrt")]] <- sqrt(independientes[[variable]])
  independientes[[paste0(variable, "_log")]] <- log(independientes[[variable]] + 1) 
  
  # Lineal models
  modelo_original <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable)])
  modelo_cuadrado <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable, paste0(variable, "2"))])
  modelo_sqrt <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable, paste0(variable, "_sqrt"))])
  modelo_log <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable, paste0(variable, "_log"))])

  # Create value ranges
  rango_variable <- seq(min(independientes[[variable]], na.rm = TRUE), max(independientes[[variable]], na.rm = TRUE), length.out = 100)
  newdata <- data.frame(variable = rango_variable)
  colnames(newdata) <- variable
  newdata[[paste0(variable, "2")]] <- rango_variable^2
  newdata[[paste0(variable, "_sqrt")]] <- sqrt(rango_variable)
  newdata[[paste0(variable, "_log")]] <- log(rango_variable + 1)
  
  # Predictions
  pred_original <- predict(modelo_original, newdata = newdata)
  pred_cuadrado <- predict(modelo_cuadrado, newdata = newdata)
  pred_sqrt <- predict(modelo_sqrt, newdata = newdata)
  pred_log <- predict(modelo_log, newdata = newdata)

  # Plot
  plot(independientes[[variable]], independientes$PropiedadDifMun, 
       xlab = variable, ylab = "PropiedadDifMun", main = paste("Relación entre", variable, "y PropiedadDifMun"),
       col = "blue", pch = 16)
  lines(rango_variable, pred_original, col = "red", lwd = 2)
  lines(rango_variable, pred_cuadrado, col = "green", lwd = 2, lty = 2)
  lines(rango_variable, pred_sqrt, col = "purple", lwd = 2, lty = 3)
  lines(rango_variable, pred_log, col = "orange", lwd = 2, lty = 4)
  legend("topright", legend = c("Datos", "Original", "Cuadrado", "Raíz Cuadrada", "Logaritmo"),
         col = c("blue", "red", "green", "purple", "orange"), pch = c(16, NA, NA, NA, NA), 
         lwd = c(NA, 2, 2, 2, 2), lty = c(NA, 1, 2, 3, 4))
}

# Create variables per each independent variable
for (variable in variables_independientes) {
  generar_grafico(variable)
}
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs

## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs

Variable distributions

variables_independientes <- c("ipv2021General", "PorcentajeJovenDif", "VaciasDif", "SecundariasDif", "DifPrecioHipotecaProv", "ParoJuvenilDif", "ProtegidaDif", "RentaMedia2021", "PorcentajeTurísticas2021", "PrecioAlquilerM2", "Herencia11Mun")

par(mfrow = c(4, 3))  

independientes <- independientes %>%
  select_if(is.numeric)

# Create histograms per each variable
for (variable in variables_independientes) {
  if (is.numeric(independientes[[variable]])) {
    hist(independientes[[variable]], main = paste("Histograma de", variable),
         xlab = variable, col = "skyblue", border = "white")
  } else {
    warning(paste("Variable", variable, "is not numeric and will be skipped."))
  }
}
## Warning: Variable ipv2021General is not numeric and will be skipped.

-

Principal component analysis

I keep all variables included in the multilevel model and the variables of the Difference in all tenure regimes.

municipios <- read_xlsx("municipios.xlsx")

municipios <- municipios |> 
    mutate(MUN_LITERAL = ifelse(grepl("\\d", MUN_LITERAL), paste(MUN_LITERAL, PROV_LITERAL), MUN_LITERAL))

# Keep province name in the rural municipalities names
names = municipios$MUN_LITERAL
prov = municipios$PROV_LITERAL
ccaa = municipios$CCAA

numeric_data <- municipios %>%
  ungroup() |> 
  select_if(is.numeric) 

numeric_data <- scale(numeric_data)
numeric_data <- as.data.frame(numeric_data)

# Select variables from multilevel model
selected_vars <- numeric_data %>% 
  select(PropiedadDifMun, AlquilerDifMun, OtroDifMun, Herencia11Mun, PorcentajeJovenDif, VaciasDif, SecundariasDif, PorcentajeNoEuropeaDif, DifPrecioHipotecaProv, ParoJuvenilDif, ProtegidaDif, PorcentajeTurísticas2021, PrecioAlquilerM2, RentaMedia2021)

# Creating the components
set.seed(123)
pca = prcomp(selected_vars, scale=T)
summary(pca)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.7991 1.4413 1.3021 1.2367 1.12508 0.96951 0.89003
## Proportion of Variance 0.2312 0.1484 0.1211 0.1092 0.09041 0.06714 0.05658
## Cumulative Proportion  0.2312 0.3796 0.5007 0.6099 0.70035 0.76748 0.82407
##                            PC8     PC9    PC10    PC11    PC12    PC13
## Standard deviation     0.77704 0.73491 0.63617 0.61180 0.58372 0.44658
## Proportion of Variance 0.04313 0.03858 0.02891 0.02674 0.02434 0.01425
## Cumulative Proportion  0.86720 0.90577 0.93468 0.96142 0.98575 1.00000
##                             PC14
## Standard deviation     0.0004835
## Proportion of Variance 0.0000000
## Cumulative Proportion  1.0000000
# Plot of the variability explained by components
fviz_screeplot(pca, addlabels = TRUE)

Component 1

# Contribution of variables to component 1
fviz_contrib(pca, choice = "var", axes = 1)

# Plot adjustments
n <- 10
col_pos <- colorRampPalette(c("#CCE6CC", "#208909"))(n)
col_neg <- colorRampPalette(c("#FFE6E6", "#B30000"))(n)
coefs <- pca$rotation[,1]
valor_max <- max(coefs)
valor_min <- min(coefs)
coefs_ajustados <- coefs - valor_min + 1
colores <- ifelse(coefs >= 0, col_pos[ceiling((coefs_ajustados / (valor_max - valor_min + 1)) * n)], 
                  col_neg[ceiling((abs(coefs_ajustados) / (valor_max - valor_min + 1)) * n)])

# Plot
barplot(coefs, las = 2, col = colores, cex.names = 0.8)

#Ranking of municipalities of this component
names[order(pca$x[,1], decreasing=T)][1:5]
## [1] "<=2000 Lugo"              "<=2000 Ourense"          
## [3] "2001 <= 5000 Lugo"        "2001 <= 5000 Palmas, Las"
## [5] "<=2000 Ciudad Real"
names[order(pca$x[,1])][1:5]
## [1] "Alcobendas"                 "Pozuelo de Alarcón"        
## [3] "Boadilla del Monte"         "San Sebastián de los Reyes"
## [5] "Rozas de Madrid, Las"

Component 2

# Contribution of variables to component 1
fviz_contrib(pca, choice = "var", axes = 2)

# Plot adjustments
n <- 10
col_pos <- colorRampPalette(c("#CCE6CC", "#208909"))(n)
col_neg <- colorRampPalette(c("#FFE6E6", "#B30000"))(n)
coefs <- pca$rotation[,2]
valor_max <- max(coefs)
valor_min <- min(coefs)
coefs_ajustados <- coefs - valor_min + 1
colores <- ifelse(coefs >= 0, col_pos[ceiling((coefs_ajustados / (valor_max - valor_min + 1)) * n)], 
                  col_neg[ceiling((abs(coefs_ajustados) / (valor_max - valor_min + 1)) * n)])

# Plot
barplot(coefs, las = 2, col = colores, cex.names = 0.8)

#Ranking of municipalities of this component
names[order(pca$x[,2], decreasing=T)][1:5]
## [1] "Ceuta"                   "Oliva, La"              
## [3] "Sant Josep de sa Talaia" "Mogán"                  
## [5] "Adeje"
names[order(pca$x[,2])][1:5]
## [1] "Torre-Pacheco"       "Mazarrón"            "Águilas"            
## [4] "Lorca"               "2001 <= 5000 Murcia"

Components plot

p <-data.frame(z1 = pca$x[,1], z2 = pca$x[,2]) %>%
  ggplot(aes(z1, z2, label = names, color = ccaa)) +
  geom_point(size = 0) +
  labs(title = "First two principal components (scores)", x = "PC1", y = "PC2") +  theme_bw() +   theme(legend.position = "bottom") +
  geom_text(size = 3, hjust = 0.6, vjust = 0, check_overlap = T)

p

# Versión interactiva
library(plotly)
## 
## Attaching package: 'plotly'
## The following objects are masked from 'package:flextable':
## 
##     highlight, style
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(p)

Clusters

How many clusters? Two methods suggest 2, the third one 5.

fviz_nbclust(scale(selected_vars), kmeans, method = 'wss', k.max = 20, nstart = 1000)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations

fviz_nbclust(scale(selected_vars), kmeans, method = 'silhouette', k.max = 20, nstart = 1000)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations

fviz_nbclust(scale(selected_vars), kmeans, method = 'gap_stat', k.max = 10, nstart = 100, nboot = 100)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations

Cluster plot

set.seed(123)
fit <- kmeans(selected_vars, centers = 5, nstart = 100)
fit
## K-means clustering with 5 clusters of sizes 138, 258, 28, 37, 123
## 
## Cluster means:
##   PropiedadDifMun AlquilerDifMun  OtroDifMun Herencia11Mun PorcentajeJovenDif
## 1       0.9685854     -0.8477602 -0.30567771     0.6610622         0.03787514
## 2      -0.2728040      0.1529839  0.22535334    -0.2840823        -0.27206479
## 3      -0.1165844     -0.4909389  0.99902870     1.6815336         0.08372657
## 4      -0.4613462      1.0749195 -0.94344746    -0.2531881         0.34197068
## 5      -0.3491640      0.4186611 -0.07335679    -0.4524255         0.40624948
##    VaciasDif SecundariasDif PorcentajeNoEuropeaDif DifPrecioHipotecaProv
## 1  0.8928064      0.7570246            -0.47491431            -0.4932235
## 2 -0.4309271      0.2337835            -0.01691201            -0.3684490
## 3  1.2959531     -0.3799071            -0.59594063             0.8727930
## 4 -0.2021411     -0.7524592             0.14505481            -0.4309580
## 5 -0.3319965     -1.0268874             0.66033157             1.2571701
##   ParoJuvenilDif ProtegidaDif PorcentajeTurísticas2021 PrecioAlquilerM2
## 1      0.1160849   0.17555398              -0.17474946       -0.7768667
## 2     -0.2308582   0.32614666              -0.18164308       -0.3481197
## 3      1.6124995   0.06949617               3.11462238        0.2697267
## 4      1.7196120  -2.65495182              -0.31535770        1.6145831
## 5     -0.5303571  -0.09825174              -0.03708822        1.0547199
##   RentaMedia2021
## 1    -0.64673846
## 2    -0.03578979
## 3    -0.30194666
## 4    -0.61340852
## 5     1.05393737
## 
## Clustering vector:
##   [1] 4 4 4 4 1 2 2 1 2 2 1 1 1 2 2 2 2 2 2 2 5 3 5 2 2 2 2 2 2 2 2 1 2 2 2 2 2
##  [38] 2 1 2 1 2 2 2 2 2 5 2 5 2 1 1 1 2 1 1 1 2 2 2 2 2 2 3 3 3 5 3 5 5 5 5 5 5
##  [75] 5 5 5 3 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [112] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 2 1 1 2 2 1 1 1 2 1 2 1 1 5 1 2 1 1 2 3
## [149] 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1 2 2 1 2 1 1 1 1 2 2
## [186] 2 2 2 1 2 1 2 1 2 1 2 1 2 2 2 2 2 2 1 2 1 1 2 1 2 2 2 5 5 2 5 5 2 5 2 5 5
## [223] 1 2 2 1 3 2 2 2 1 2 2 1 2 2 2 2 2 4 4 4 4 4 4 4 4 4 4 1 2 1 1 1 2 2 2 2 2
## [260] 1 1 2 2 2 1 1 1 1 1 1 2 2 1 1 1 2 2 1 2 2 2 2 2 2 2 5 1 1 2 2 1 2 1 1 1 2
## [297] 1 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [334] 5 1 1 1 5 1 1 2 5 2 5 1 5 5 5 5 3 5 1 2 5 1 2 2 1 2 1 2 2 2 1 2 2 2 2 2 2
## [371] 2 2 2 2 2 4 4 4 4 4 4 1 1 1 1 1 1 1 2 1 1 2 2 1 1 2 2 1 2 1 2 3 2 3 2 3 2
## [408] 2 2 3 3 2 2 3 2 3 3 2 1 1 1 1 3 1 1 1 2 2 1 1 1 1 2 2 1 2 3 3 3 3 3 3 4 3
## [445] 3 1 3 1 3 1 1 1 2 1 2 2 2 2 2 2 2 1 1 2 2 1 1 2 2 1 2 2 1 1 2 1 1 2 2 2 1
## [482] 2 2 2 2 2 1 2 2 2 1 2 2 2 2 5 2 2 5 2 2 2 2 5 1 1 2 2 2 2 1 1 2 2 2 2 1 2
## [519] 2 2 2 2 2 2 5 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 5 2 2 2 2
## [556] 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 2 2 1 2 1 2 1 2 2 4 5
## 
## Within cluster sum of squares by cluster:
## [1]  968.2069 1740.3895  789.0999  337.2357 1429.2597
##  (between_SS / total_SS =  35.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
selected_vars$cluster <- fit$cluster

centers=fit$centers

barplot(centers[1,], las=2, col="#097969")

barplot(centers[2,], las=2, col="#097969")

p <-fviz_cluster(fit, data = selected_vars, geom = c("point"), ellipse.type = 'norm', pointsize = 1, show.clust.cent = FALSE) +
  theme_minimal() + geom_text(label = names, hjust = 0, vjust = 0, size = 2, check_overlap = TRUE) +
  scale_fill_brewer(palette = "Paired", guide = FALSE)
p
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave(filename = "cluster_plot.png", plot = p, width = 8, height = 4, dpi = 300)


library(plotly)
ggplotly(p)

-

Extras

Logit model

This model was tested although not used for the final work.

If I do not balance the classes, 96% of predictions is a decrease and 32% is an increase in property. The accuracy is 85%, which is logical because the classes are unbalanced. We see that the specificity is 96%, meaning it accurately predicts decreases, but the sensitivity is only 0.32, meaning it poorly detects increases.

If the classes are balanced with N=1000, the accuracy drops to 72%, specificity to 71%, but the sensitivity increases to 73%.

# Preparar datos
municipios <- read_xlsx("municipios.xlsx")

municipios <- municipios |> 
    mutate(MUN_LITERAL = ifelse(grepl("\\d", MUN_LITERAL), paste(MUN_LITERAL, PROV_LITERAL), MUN_LITERAL))

numeric_data <- municipios %>%
  ungroup() |> 
  select_if(is.numeric) 

numeric_data <- numeric_data %>%
  mutate(Propiedad = ifelse(PropiedadDifMun >= 0, 1, 0)) %>%
  select(Propiedad, RentaMedia2021, PorcentajeJoven2021, PorcentajeJovenDif, PorcentajeVacias2021, PorcentajeSecundarias2021, PorcentajeEspañola2021, PorcentajeProtegidas2021, ProtegidaDif, PrecioAlquilerM2) 

# Equilibrar clases
prop.table(table(numeric_data$Propiedad)) 
## 
##          0          1 
## 0.90924658 0.09075342
library(ROSE)
## Loaded ROSE 0.0-4
numeric_data <- ovun.sample(Propiedad~., 
                    data = numeric_data, 
                    method = "over", 
                    N = 1000)$data

table(numeric_data$Propiedad)
## 
##   0   1 
## 531 469
# Modelo
modelo <- glm(Propiedad ~ ., data = numeric_data, family = binomial(link = "logit"))

summary(modelo)
## 
## Call:
## glm(formula = Propiedad ~ ., family = binomial(link = "logit"), 
##     data = numeric_data)
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -9.809e+00  1.945e+00  -5.043 4.59e-07 ***
## RentaMedia2021             1.293e-04  2.274e-05   5.687 1.29e-08 ***
## PorcentajeJoven2021        1.932e-02  2.166e-02   0.892   0.3725    
## PorcentajeJovenDif         6.972e-02  3.514e-02   1.984   0.0472 *  
## PorcentajeVacias2021       8.358e-02  1.234e-02   6.771 1.27e-11 ***
## PorcentajeSecundarias2021  4.906e-02  8.625e-03   5.688 1.29e-08 ***
## PorcentajeEspañola2021     2.950e-02  1.201e-02   2.457   0.0140 *  
## PorcentajeProtegidas2021  -9.434e+00  3.883e+00  -2.430   0.0151 *  
## ProtegidaDif              -6.475e+00  1.616e+00  -4.005 6.19e-05 ***
## PrecioAlquilerM2           7.639e-02  6.327e-02   1.207   0.2273    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1382.4  on 999  degrees of freedom
## Residual deviance: 1062.2  on 990  degrees of freedom
## AIC: 1082.2
## 
## Number of Fisher Scoring iterations: 5
# Matriz de confusión
prob_pred <- predict(modelo, type = "response")
clases_pred <- ifelse(prob_pred > 0.5, 1, 0)

confusion_matrix <-table(Real = numeric_data$Propiedad, Predicho = clases_pred)
confusion_matrix
##     Predicho
## Real   0   1
##    0 417 114
##    1 173 296
# Accuracy
sum(diag(confusion_matrix)) / sum(confusion_matrix)
## [1] 0.713
# Sensibilidad
confusion_matrix[2, 2] / sum(confusion_matrix[2, ])
## [1] 0.6311301
# Especificidad
confusion_matrix[1, 1] / sum(confusion_matrix[1, ])
## [1] 0.7853107

Evolution of Tenure Regimes in target population plot

# Calcular las medias para cada régimen en 2011 y 2021
media_2011 <- propiedad %>%
  summarise(
    Property11 = mean(Propiedad11Mun, na.rm = TRUE),
    Rent11 = mean(Alquiler11Mun, na.rm = TRUE),
    Other11 = mean(Otro11Mun, na.rm = TRUE))

media_2021 <- propiedad %>%
  summarise(
    Property21 = mean(Propiedad21Mun, na.rm = TRUE),
    Rent21 = mean(Alquiler21Mun, na.rm = TRUE),
    Other21 = mean(Otro21Mun, na.rm = TRUE)
  )

# Combinar los datos en un solo dataframe
medias <- tibble(
  Year = c(2011, 2021),
  Property = c(media_2011$Property11, media_2021$Property21),
  Rent = c(media_2011$Rent11, media_2021$Rent21),
  Other = c(media_2011$Other11, media_2021$Other21)
)

# Transformar los datos a formato largo y modificar el orden de "Regime"
medias_long <- medias %>%
  pivot_longer(cols = c(Property, Rent, Other),
               names_to = "Regime",
               values_to = "Value") %>%
  mutate(Regime = factor(Regime, levels = c("Property", "Rent", "Other")))  # Modificar el orden de la variable "Regime" para la leyenda

evo <- ggplot(medias_long, aes(x = Year, y = Value, color = Regime, group = Regime)) +
  geom_line(size = 0.7) +
  geom_point(size = 2, stroke = 0.5) +  # Ajustar el ancho de la línea de los puntos
  geom_text(aes(label = paste0(round(Value, 1), "%")), vjust = -0.8, family = "Calibri", size = 4.2, color = "black") +  # Agregar el signo de porcentaje y cambiar el color a negro
  scale_color_manual(values=c("Property"="#ff7767", "Rent"="#19CF8B", "Other" = "#FFA500")) +  # Cambiar el color de "Otro"
  labs(title="Evolution of tenure regimes in Spain in 2011-2021 (targeted population)",  # Eliminar el título
       caption= "Source: Self-elaboration from Census data",  # Eliminar la leyenda
       x=NULL,
       y=NULL,
       color=NULL) +
  scale_y_continuous(labels = scales::percent_format(scale = 1)) +
  scale_x_continuous(breaks = c(2011, 2021), limits = c(2011, 2021)) +  # Establecer los límites y las marcas del eje x
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, vjust = 6.5, size = 16, family = "Times New Roman", color = "black", face = "bold"),
    plot.caption = element_text(hjust = -0.1, vjust=-5, family = "Times New Roman", color="#555555", size = 12),  # Cambiar la fuente de la leyenda
    axis.title = element_blank(),
    axis.text.x = element_text(color="#333333", size = 12, family = "Times New Roman"),  # Ajustar el tamaño de la fuente del texto del eje x
    axis.text.y = element_text(color="#333333", size = 12, family = "Times New Roman"),  # Ajustar el tamaño de la fuente del texto del eje y
    axis.line.x = element_line(size = 0.5, color = "black"),
    axis.line.y = element_line(size = 0.5, color = "black"),
legend.text = element_text(margin = margin(t = 0, r = 0.5, b = 0, l = 0), color="#333333", family = "Calibri", size = 12),
    panel.background = element_rect(fill = "white", color = NA),  # Remove border
    panel.grid.major = element_line(color = "grey80", size = 0.5),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),  # Remove panel border
    plot.margin = unit(c(1, 0.5, 1.2, 1), "cm")) +
  guides(color = guide_legend(override.aes = list(shape = "square", size = 3, linetype = "blank")))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggsave("evolucion.png", plot = evo, width = 8, height = 5.7, bg = "white")

Scatterplot of Municipalities and Dependent Variable

municipios <- municipios |> 
    mutate(MUN_LITERAL = ifelse(grepl("\\d", MUN_LITERAL), paste(MUN_LITERAL, PROV_LITERAL), MUN_LITERAL))

names = municipios$MUN_LITERAL
prov = municipios$PROV_LITERAL
ccaa = municipios$CCAA

library(ggplot2)
scatterplot <- ggplot(municipios, aes(x = PropiedadDifMun, y = MUN_LITERAL,  color = CCAA)) +
  geom_point(aes(text = paste(MUN_LITERAL, "PropiedadDifMun:", PropiedadDifMun))) +
  geom_text(aes(label = MUN_LITERAL), hjust = 1.5, vjust = 0.5, size = 2.5) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(title = "Scatterplot Interactivo de Municipios",
       x = "PropiedadDifMun",
       y = "")
## Warning in geom_point(aes(text = paste(MUN_LITERAL, "PropiedadDifMun:", :
## Ignoring unknown aesthetics: text
# Convertirlo en un gráfico interactivo con ggplotly
ggplotly(scatterplot, tooltip = "text")